#####install packages

## Loading required package: lattice
## Loading required package: ggplot2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loaded gbm 2.1.5
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess

##read in data

C = read.csv("Calls.csv")
C$Species_no = 0
igrey = which(C$Species=="Grey")
C$Species_no[igrey]=1

rm = c("Name", "Species")
keep= setdiff(names(C), rm)
C = C[,keep]

save(C, file = "C.Rdata")
load("C.Rdata")
# get the feature real names
label_col = which(names(C)== "Species_no")

names <-  colnames(C[,-c(label_col)])
y_col = label_col

model<-as.formula(paste(colnames(C)[y_col], "~",
                        paste(names,collapse = "+"),
                        sep = ""))
model
## Species_no ~ No_of_harmonics + Agg_Entropy + Avg_Entropy + Avg_Power + 
##     BW_90. + Center_Freq + Center_Time + Center_Time_Rel + Delta_Freq + 
##     Delta_Time + Dur_90. + Energy + Freq_5. + Freq_5.._Rel + 
##     Freq_95. + Freq_95._Rel + IQR_BW + IQR_Dur + Inband_Power + 
##     Max_Entropy + Max_Freq + Max_Power + Max_Time + Min_Entropy + 
##     Peak_Freq + PFC_Max_Freq + PFC_Max_Slope + PFC_Min_Freq + 
##     PFC_Num_Inf_Pts + Peak_Power + Peak_Time + Peak_Time_Relative + 
##     Sample_Length + Time_5. + Time_5._Rel + Time_95. + Time_95._Rel
DP =createDataPartition(y = C$Species_no, 
                        p = 0.8,
                        list = FALSE)
Train = C[DP,]
Test = C[-DP,]

ptm<-proc.time()

n.trees = 15000
shrinkage = 0.001#final version should be 0.001
cv.folds = 10#final version should be 10
gbmtest<- gbm(model,
              data=Train,
              distribution="bernoulli",
              n.trees=n.trees,
              shrinkage=shrinkage,
              interaction.depth=3,
              bag.fraction=0.50,
              train.fraction=1,
              n.minobsinnode=5,
              cv.folds=cv.folds,
              keep.data=TRUE,
              verbose=TRUE,
              n.cores=NULL)
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1        1.3802             nan     0.0010    0.0002
##      2        1.3792             nan     0.0010    0.0003
##      3        1.3784             nan     0.0010    0.0001
##      4        1.3776             nan     0.0010    0.0000
##      5        1.3767             nan     0.0010    0.0003
##      6        1.3762             nan     0.0010   -0.0001
##      7        1.3754             nan     0.0010    0.0002
##      8        1.3746             nan     0.0010    0.0002
##      9        1.3739             nan     0.0010    0.0001
##     10        1.3731             nan     0.0010   -0.0000
##     20        1.3656             nan     0.0010    0.0000
##     40        1.3503             nan     0.0010    0.0002
##     60        1.3350             nan     0.0010    0.0001
##     80        1.3197             nan     0.0010    0.0002
##    100        1.3055             nan     0.0010    0.0002
##    120        1.2909             nan     0.0010    0.0001
##    140        1.2784             nan     0.0010   -0.0002
##    160        1.2645             nan     0.0010    0.0001
##    180        1.2516             nan     0.0010   -0.0000
##    200        1.2385             nan     0.0010    0.0002
##    220        1.2255             nan     0.0010    0.0001
##    240        1.2118             nan     0.0010    0.0000
##    260        1.1992             nan     0.0010    0.0002
##    280        1.1866             nan     0.0010    0.0000
##    300        1.1741             nan     0.0010    0.0002
##    320        1.1617             nan     0.0010    0.0001
##    340        1.1501             nan     0.0010    0.0002
##    360        1.1383             nan     0.0010   -0.0000
##    380        1.1269             nan     0.0010    0.0001
##    400        1.1160             nan     0.0010    0.0000
##    420        1.1056             nan     0.0010    0.0001
##    440        1.0941             nan     0.0010    0.0001
##    460        1.0848             nan     0.0010   -0.0000
##    480        1.0739             nan     0.0010    0.0003
##    500        1.0642             nan     0.0010   -0.0000
##    520        1.0542             nan     0.0010   -0.0001
##    540        1.0441             nan     0.0010    0.0001
##    560        1.0337             nan     0.0010   -0.0000
##    580        1.0245             nan     0.0010    0.0001
##    600        1.0152             nan     0.0010    0.0002
##    620        1.0061             nan     0.0010    0.0000
##    640        0.9962             nan     0.0010    0.0001
##    660        0.9867             nan     0.0010    0.0000
##    680        0.9776             nan     0.0010    0.0001
##    700        0.9688             nan     0.0010   -0.0000
##    720        0.9604             nan     0.0010   -0.0000
##    740        0.9512             nan     0.0010    0.0000
##    760        0.9422             nan     0.0010    0.0001
##    780        0.9341             nan     0.0010    0.0000
##    800        0.9254             nan     0.0010    0.0001
##    820        0.9168             nan     0.0010   -0.0000
##    840        0.9090             nan     0.0010   -0.0000
##    860        0.9013             nan     0.0010    0.0001
##    880        0.8942             nan     0.0010    0.0001
##    900        0.8860             nan     0.0010    0.0000
##    920        0.8782             nan     0.0010    0.0001
##    940        0.8705             nan     0.0010    0.0000
##    960        0.8626             nan     0.0010   -0.0000
##    980        0.8554             nan     0.0010   -0.0000
##   1000        0.8482             nan     0.0010   -0.0001
##   1020        0.8409             nan     0.0010   -0.0002
##   1040        0.8340             nan     0.0010   -0.0000
##   1060        0.8275             nan     0.0010   -0.0000
##   1080        0.8208             nan     0.0010    0.0000
##   1100        0.8137             nan     0.0010    0.0001
##   1120        0.8071             nan     0.0010    0.0000
##   1140        0.8006             nan     0.0010    0.0000
##   1160        0.7941             nan     0.0010   -0.0000
##   1180        0.7874             nan     0.0010   -0.0001
##   1200        0.7811             nan     0.0010    0.0000
##   1220        0.7753             nan     0.0010   -0.0000
##   1240        0.7690             nan     0.0010    0.0000
##   1260        0.7627             nan     0.0010    0.0001
##   1280        0.7568             nan     0.0010    0.0000
##   1300        0.7504             nan     0.0010   -0.0000
##   1320        0.7446             nan     0.0010   -0.0001
##   1340        0.7392             nan     0.0010    0.0000
##   1360        0.7332             nan     0.0010   -0.0001
##   1380        0.7276             nan     0.0010   -0.0000
##   1400        0.7215             nan     0.0010   -0.0002
##   1420        0.7159             nan     0.0010    0.0000
##   1440        0.7105             nan     0.0010   -0.0001
##   1460        0.7050             nan     0.0010    0.0001
##   1480        0.6996             nan     0.0010   -0.0000
##   1500        0.6951             nan     0.0010   -0.0001
##   1520        0.6896             nan     0.0010    0.0000
##   1540        0.6840             nan     0.0010    0.0000
##   1560        0.6787             nan     0.0010   -0.0000
##   1580        0.6736             nan     0.0010   -0.0001
##   1600        0.6687             nan     0.0010    0.0000
##   1620        0.6635             nan     0.0010   -0.0000
##   1640        0.6584             nan     0.0010   -0.0000
##   1660        0.6530             nan     0.0010   -0.0000
##   1680        0.6481             nan     0.0010   -0.0001
##   1700        0.6435             nan     0.0010    0.0000
##   1720        0.6388             nan     0.0010   -0.0001
##   1740        0.6339             nan     0.0010   -0.0001
##   1760        0.6287             nan     0.0010   -0.0001
##   1780        0.6241             nan     0.0010    0.0000
##   1800        0.6194             nan     0.0010    0.0001
##   1820        0.6150             nan     0.0010    0.0000
##   1840        0.6104             nan     0.0010    0.0000
##   1860        0.6059             nan     0.0010   -0.0000
##   1880        0.6012             nan     0.0010   -0.0000
##   1900        0.5966             nan     0.0010   -0.0001
##   1920        0.5919             nan     0.0010    0.0001
##   1940        0.5879             nan     0.0010   -0.0000
##   1960        0.5838             nan     0.0010   -0.0000
##   1980        0.5795             nan     0.0010   -0.0000
##   2000        0.5757             nan     0.0010    0.0001
##   2020        0.5716             nan     0.0010    0.0000
##   2040        0.5677             nan     0.0010   -0.0001
##   2060        0.5636             nan     0.0010   -0.0001
##   2080        0.5596             nan     0.0010    0.0000
##   2100        0.5553             nan     0.0010    0.0000
##   2120        0.5511             nan     0.0010   -0.0000
##   2140        0.5475             nan     0.0010   -0.0000
##   2160        0.5434             nan     0.0010   -0.0000
##   2180        0.5394             nan     0.0010   -0.0000
##   2200        0.5355             nan     0.0010   -0.0001
##   2220        0.5318             nan     0.0010    0.0000
##   2240        0.5283             nan     0.0010   -0.0001
##   2260        0.5246             nan     0.0010   -0.0000
##   2280        0.5207             nan     0.0010   -0.0000
##   2300        0.5170             nan     0.0010   -0.0001
##   2320        0.5136             nan     0.0010   -0.0000
##   2340        0.5096             nan     0.0010   -0.0000
##   2360        0.5062             nan     0.0010   -0.0000
##   2380        0.5027             nan     0.0010    0.0000
##   2400        0.4995             nan     0.0010   -0.0000
##   2420        0.4960             nan     0.0010   -0.0001
##   2440        0.4925             nan     0.0010   -0.0001
##   2460        0.4890             nan     0.0010   -0.0001
##   2480        0.4856             nan     0.0010    0.0000
##   2500        0.4820             nan     0.0010   -0.0000
##   2520        0.4788             nan     0.0010   -0.0001
##   2540        0.4759             nan     0.0010   -0.0000
##   2560        0.4724             nan     0.0010   -0.0000
##   2580        0.4695             nan     0.0010   -0.0000
##   2600        0.4661             nan     0.0010   -0.0001
##   2620        0.4628             nan     0.0010   -0.0000
##   2640        0.4596             nan     0.0010   -0.0000
##   2660        0.4565             nan     0.0010    0.0000
##   2680        0.4532             nan     0.0010   -0.0000
##   2700        0.4497             nan     0.0010   -0.0000
##   2720        0.4468             nan     0.0010   -0.0000
##   2740        0.4439             nan     0.0010   -0.0000
##   2760        0.4408             nan     0.0010    0.0000
##   2780        0.4379             nan     0.0010   -0.0000
##   2800        0.4350             nan     0.0010   -0.0001
##   2820        0.4318             nan     0.0010    0.0000
##   2840        0.4289             nan     0.0010   -0.0001
##   2860        0.4262             nan     0.0010   -0.0000
##   2880        0.4236             nan     0.0010   -0.0001
##   2900        0.4212             nan     0.0010    0.0000
##   2920        0.4184             nan     0.0010   -0.0000
##   2940        0.4158             nan     0.0010   -0.0000
##   2960        0.4130             nan     0.0010   -0.0000
##   2980        0.4106             nan     0.0010   -0.0001
##   3000        0.4081             nan     0.0010   -0.0001
##   3020        0.4055             nan     0.0010   -0.0000
##   3040        0.4029             nan     0.0010   -0.0000
##   3060        0.4002             nan     0.0010   -0.0001
##   3080        0.3974             nan     0.0010    0.0000
##   3100        0.3951             nan     0.0010   -0.0000
##   3120        0.3926             nan     0.0010   -0.0001
##   3140        0.3902             nan     0.0010   -0.0000
##   3160        0.3875             nan     0.0010   -0.0000
##   3180        0.3849             nan     0.0010   -0.0001
##   3200        0.3823             nan     0.0010   -0.0000
##   3220        0.3799             nan     0.0010   -0.0000
##   3240        0.3773             nan     0.0010   -0.0000
##   3260        0.3749             nan     0.0010    0.0000
##   3280        0.3723             nan     0.0010   -0.0000
##   3300        0.3698             nan     0.0010   -0.0000
##   3320        0.3674             nan     0.0010   -0.0000
##   3340        0.3649             nan     0.0010   -0.0000
##   3360        0.3625             nan     0.0010   -0.0000
##   3380        0.3601             nan     0.0010   -0.0000
##   3400        0.3581             nan     0.0010   -0.0001
##   3420        0.3561             nan     0.0010   -0.0001
##   3440        0.3538             nan     0.0010   -0.0000
##   3460        0.3518             nan     0.0010   -0.0001
##   3480        0.3493             nan     0.0010    0.0000
##   3500        0.3470             nan     0.0010    0.0000
##   3520        0.3448             nan     0.0010   -0.0001
##   3540        0.3426             nan     0.0010   -0.0000
##   3560        0.3406             nan     0.0010    0.0000
##   3580        0.3383             nan     0.0010   -0.0000
##   3600        0.3362             nan     0.0010    0.0000
##   3620        0.3342             nan     0.0010   -0.0000
##   3640        0.3323             nan     0.0010   -0.0000
##   3660        0.3302             nan     0.0010   -0.0000
##   3680        0.3283             nan     0.0010   -0.0000
##   3700        0.3261             nan     0.0010   -0.0000
##   3720        0.3239             nan     0.0010   -0.0001
##   3740        0.3218             nan     0.0010   -0.0000
##   3760        0.3198             nan     0.0010   -0.0000
##   3780        0.3176             nan     0.0010    0.0000
##   3800        0.3155             nan     0.0010   -0.0000
##   3820        0.3136             nan     0.0010   -0.0000
##   3840        0.3117             nan     0.0010   -0.0000
##   3860        0.3095             nan     0.0010   -0.0000
##   3880        0.3077             nan     0.0010   -0.0000
##   3900        0.3055             nan     0.0010   -0.0000
##   3920        0.3034             nan     0.0010    0.0000
##   3940        0.3014             nan     0.0010   -0.0000
##   3960        0.2995             nan     0.0010   -0.0000
##   3980        0.2976             nan     0.0010   -0.0001
##   4000        0.2959             nan     0.0010   -0.0000
##   4020        0.2940             nan     0.0010   -0.0001
##   4040        0.2920             nan     0.0010    0.0000
##   4060        0.2901             nan     0.0010   -0.0000
##   4080        0.2884             nan     0.0010   -0.0000
##   4100        0.2870             nan     0.0010   -0.0000
##   4120        0.2852             nan     0.0010    0.0000
##   4140        0.2834             nan     0.0010   -0.0000
##   4160        0.2816             nan     0.0010   -0.0000
##   4180        0.2799             nan     0.0010   -0.0000
##   4200        0.2778             nan     0.0010   -0.0000
##   4220        0.2762             nan     0.0010   -0.0000
##   4240        0.2744             nan     0.0010   -0.0000
##   4260        0.2728             nan     0.0010   -0.0000
##   4280        0.2713             nan     0.0010   -0.0000
##   4300        0.2697             nan     0.0010   -0.0000
##   4320        0.2683             nan     0.0010   -0.0000
##   4340        0.2667             nan     0.0010   -0.0000
##   4360        0.2652             nan     0.0010   -0.0000
##   4380        0.2635             nan     0.0010   -0.0000
##   4400        0.2619             nan     0.0010   -0.0000
##   4420        0.2603             nan     0.0010   -0.0000
##   4440        0.2587             nan     0.0010   -0.0000
##   4460        0.2571             nan     0.0010   -0.0000
##   4480        0.2556             nan     0.0010   -0.0000
##   4500        0.2540             nan     0.0010   -0.0000
##   4520        0.2524             nan     0.0010   -0.0000
##   4540        0.2509             nan     0.0010    0.0000
##   4560        0.2492             nan     0.0010   -0.0000
##   4580        0.2480             nan     0.0010   -0.0000
##   4600        0.2464             nan     0.0010   -0.0000
##   4620        0.2449             nan     0.0010    0.0000
##   4640        0.2435             nan     0.0010   -0.0000
##   4660        0.2420             nan     0.0010   -0.0000
##   4680        0.2405             nan     0.0010   -0.0000
##   4700        0.2391             nan     0.0010   -0.0000
##   4720        0.2375             nan     0.0010    0.0000
##   4740        0.2363             nan     0.0010   -0.0000
##   4760        0.2351             nan     0.0010   -0.0000
##   4780        0.2338             nan     0.0010   -0.0000
##   4800        0.2324             nan     0.0010   -0.0000
##   4820        0.2309             nan     0.0010   -0.0000
##   4840        0.2295             nan     0.0010    0.0000
##   4860        0.2281             nan     0.0010   -0.0000
##   4880        0.2267             nan     0.0010   -0.0000
##   4900        0.2251             nan     0.0010   -0.0000
##   4920        0.2238             nan     0.0010   -0.0000
##   4940        0.2226             nan     0.0010   -0.0001
##   4960        0.2213             nan     0.0010   -0.0000
##   4980        0.2200             nan     0.0010   -0.0000
##   5000        0.2186             nan     0.0010   -0.0000
##   5020        0.2173             nan     0.0010   -0.0000
##   5040        0.2161             nan     0.0010   -0.0000
##   5060        0.2146             nan     0.0010   -0.0000
##   5080        0.2133             nan     0.0010   -0.0000
##   5100        0.2120             nan     0.0010   -0.0000
##   5120        0.2105             nan     0.0010   -0.0000
##   5140        0.2093             nan     0.0010   -0.0000
##   5160        0.2081             nan     0.0010   -0.0000
##   5180        0.2070             nan     0.0010   -0.0000
##   5200        0.2057             nan     0.0010   -0.0000
##   5220        0.2046             nan     0.0010   -0.0000
##   5240        0.2035             nan     0.0010   -0.0000
##   5260        0.2023             nan     0.0010   -0.0000
##   5280        0.2011             nan     0.0010   -0.0000
##   5300        0.2002             nan     0.0010   -0.0000
##   5320        0.1991             nan     0.0010   -0.0000
##   5340        0.1980             nan     0.0010   -0.0000
##   5360        0.1966             nan     0.0010   -0.0000
##   5380        0.1954             nan     0.0010   -0.0000
##   5400        0.1942             nan     0.0010   -0.0000
##   5420        0.1930             nan     0.0010   -0.0000
##   5440        0.1919             nan     0.0010   -0.0000
##   5460        0.1909             nan     0.0010   -0.0000
##   5480        0.1898             nan     0.0010   -0.0000
##   5500        0.1887             nan     0.0010   -0.0000
##   5520        0.1876             nan     0.0010   -0.0000
##   5540        0.1867             nan     0.0010   -0.0000
##   5560        0.1857             nan     0.0010   -0.0000
##   5580        0.1846             nan     0.0010   -0.0000
##   5600        0.1835             nan     0.0010   -0.0000
##   5620        0.1825             nan     0.0010   -0.0000
##   5640        0.1814             nan     0.0010   -0.0000
##   5660        0.1803             nan     0.0010    0.0000
##   5680        0.1793             nan     0.0010   -0.0000
##   5700        0.1781             nan     0.0010   -0.0000
##   5720        0.1772             nan     0.0010   -0.0000
##   5740        0.1762             nan     0.0010   -0.0000
##   5760        0.1751             nan     0.0010   -0.0000
##   5780        0.1741             nan     0.0010   -0.0000
##   5800        0.1730             nan     0.0010   -0.0000
##   5820        0.1719             nan     0.0010   -0.0000
##   5840        0.1710             nan     0.0010   -0.0000
##   5860        0.1701             nan     0.0010   -0.0000
##   5880        0.1693             nan     0.0010   -0.0000
##   5900        0.1682             nan     0.0010   -0.0000
##   5920        0.1672             nan     0.0010   -0.0000
##   5940        0.1663             nan     0.0010   -0.0000
##   5960        0.1654             nan     0.0010   -0.0000
##   5980        0.1645             nan     0.0010   -0.0000
##   6000        0.1636             nan     0.0010   -0.0000
##   6020        0.1627             nan     0.0010   -0.0000
##   6040        0.1618             nan     0.0010   -0.0000
##   6060        0.1608             nan     0.0010   -0.0000
##   6080        0.1601             nan     0.0010   -0.0000
##   6100        0.1592             nan     0.0010   -0.0000
##   6120        0.1583             nan     0.0010   -0.0000
##   6140        0.1574             nan     0.0010   -0.0000
##   6160        0.1564             nan     0.0010   -0.0000
##   6180        0.1556             nan     0.0010   -0.0000
##   6200        0.1546             nan     0.0010   -0.0000
##   6220        0.1537             nan     0.0010   -0.0000
##   6240        0.1529             nan     0.0010   -0.0000
##   6260        0.1520             nan     0.0010   -0.0000
##   6280        0.1511             nan     0.0010   -0.0000
##   6300        0.1503             nan     0.0010    0.0000
##   6320        0.1495             nan     0.0010    0.0000
##   6340        0.1485             nan     0.0010   -0.0000
##   6360        0.1477             nan     0.0010   -0.0000
##   6380        0.1468             nan     0.0010    0.0000
##   6400        0.1460             nan     0.0010   -0.0000
##   6420        0.1452             nan     0.0010   -0.0000
##   6440        0.1444             nan     0.0010    0.0000
##   6460        0.1435             nan     0.0010   -0.0000
##   6480        0.1426             nan     0.0010   -0.0000
##   6500        0.1417             nan     0.0010    0.0000
##   6520        0.1410             nan     0.0010   -0.0000
##   6540        0.1402             nan     0.0010   -0.0000
##   6560        0.1395             nan     0.0010   -0.0000
##   6580        0.1387             nan     0.0010   -0.0000
##   6600        0.1380             nan     0.0010   -0.0000
##   6620        0.1371             nan     0.0010   -0.0000
##   6640        0.1364             nan     0.0010   -0.0000
##   6660        0.1356             nan     0.0010   -0.0000
##   6680        0.1349             nan     0.0010   -0.0000
##   6700        0.1342             nan     0.0010   -0.0000
##   6720        0.1335             nan     0.0010   -0.0000
##   6740        0.1327             nan     0.0010   -0.0000
##   6760        0.1319             nan     0.0010   -0.0000
##   6780        0.1311             nan     0.0010   -0.0000
##   6800        0.1304             nan     0.0010   -0.0000
##   6820        0.1297             nan     0.0010   -0.0000
##   6840        0.1290             nan     0.0010   -0.0000
##   6860        0.1283             nan     0.0010   -0.0000
##   6880        0.1277             nan     0.0010   -0.0000
##   6900        0.1269             nan     0.0010    0.0000
##   6920        0.1262             nan     0.0010   -0.0000
##   6940        0.1255             nan     0.0010   -0.0000
##   6960        0.1247             nan     0.0010   -0.0000
##   6980        0.1239             nan     0.0010    0.0000
##   7000        0.1231             nan     0.0010   -0.0000
##   7020        0.1225             nan     0.0010   -0.0000
##   7040        0.1219             nan     0.0010   -0.0000
##   7060        0.1212             nan     0.0010   -0.0000
##   7080        0.1206             nan     0.0010   -0.0000
##   7100        0.1199             nan     0.0010   -0.0000
##   7120        0.1193             nan     0.0010    0.0000
##   7140        0.1186             nan     0.0010   -0.0000
##   7160        0.1180             nan     0.0010   -0.0000
##   7180        0.1173             nan     0.0010   -0.0000
##   7200        0.1167             nan     0.0010   -0.0000
##   7220        0.1161             nan     0.0010   -0.0000
##   7240        0.1155             nan     0.0010   -0.0000
##   7260        0.1148             nan     0.0010   -0.0000
##   7280        0.1142             nan     0.0010   -0.0000
##   7300        0.1137             nan     0.0010   -0.0000
##   7320        0.1130             nan     0.0010   -0.0000
##   7340        0.1123             nan     0.0010    0.0000
##   7360        0.1117             nan     0.0010   -0.0000
##   7380        0.1110             nan     0.0010   -0.0000
##   7400        0.1104             nan     0.0010   -0.0000
##   7420        0.1097             nan     0.0010    0.0000
##   7440        0.1091             nan     0.0010   -0.0000
##   7460        0.1085             nan     0.0010   -0.0000
##   7480        0.1079             nan     0.0010    0.0000
##   7500        0.1073             nan     0.0010   -0.0000
##   7520        0.1067             nan     0.0010   -0.0000
##   7540        0.1062             nan     0.0010   -0.0000
##   7560        0.1056             nan     0.0010   -0.0000
##   7580        0.1050             nan     0.0010   -0.0000
##   7600        0.1045             nan     0.0010   -0.0000
##   7620        0.1038             nan     0.0010   -0.0000
##   7640        0.1032             nan     0.0010   -0.0000
##   7660        0.1027             nan     0.0010   -0.0000
##   7680        0.1021             nan     0.0010    0.0000
##   7700        0.1015             nan     0.0010   -0.0000
##   7720        0.1010             nan     0.0010   -0.0000
##   7740        0.1004             nan     0.0010    0.0000
##   7760        0.0999             nan     0.0010   -0.0000
##   7780        0.0994             nan     0.0010   -0.0000
##   7800        0.0989             nan     0.0010   -0.0000
##   7820        0.0984             nan     0.0010   -0.0000
##   7840        0.0979             nan     0.0010   -0.0000
##   7860        0.0973             nan     0.0010   -0.0000
##   7880        0.0969             nan     0.0010   -0.0000
##   7900        0.0964             nan     0.0010   -0.0000
##   7920        0.0958             nan     0.0010   -0.0000
##   7940        0.0954             nan     0.0010   -0.0000
##   7960        0.0948             nan     0.0010    0.0000
##   7980        0.0943             nan     0.0010   -0.0000
##   8000        0.0938             nan     0.0010   -0.0000
##   8020        0.0932             nan     0.0010    0.0000
##   8040        0.0927             nan     0.0010   -0.0000
##   8060        0.0922             nan     0.0010    0.0000
##   8080        0.0917             nan     0.0010    0.0000
##   8100        0.0911             nan     0.0010   -0.0000
##   8120        0.0907             nan     0.0010   -0.0000
##   8140        0.0902             nan     0.0010   -0.0000
##   8160        0.0897             nan     0.0010   -0.0000
##   8180        0.0893             nan     0.0010   -0.0000
##   8200        0.0887             nan     0.0010    0.0000
##   8220        0.0883             nan     0.0010   -0.0000
##   8240        0.0878             nan     0.0010   -0.0000
##   8260        0.0873             nan     0.0010   -0.0000
##   8280        0.0869             nan     0.0010   -0.0000
##   8300        0.0864             nan     0.0010   -0.0000
##   8320        0.0860             nan     0.0010   -0.0000
##   8340        0.0857             nan     0.0010   -0.0000
##   8360        0.0852             nan     0.0010   -0.0000
##   8380        0.0848             nan     0.0010   -0.0000
##   8400        0.0843             nan     0.0010   -0.0000
##   8420        0.0839             nan     0.0010   -0.0000
##   8440        0.0835             nan     0.0010   -0.0000
##   8460        0.0831             nan     0.0010   -0.0000
##   8480        0.0826             nan     0.0010   -0.0000
##   8500        0.0822             nan     0.0010   -0.0000
##   8520        0.0818             nan     0.0010   -0.0000
##   8540        0.0814             nan     0.0010   -0.0000
##   8560        0.0810             nan     0.0010   -0.0000
##   8580        0.0806             nan     0.0010   -0.0000
##   8600        0.0801             nan     0.0010   -0.0000
##   8620        0.0796             nan     0.0010   -0.0000
##   8640        0.0792             nan     0.0010   -0.0000
##   8660        0.0788             nan     0.0010   -0.0000
##   8680        0.0784             nan     0.0010   -0.0000
##   8700        0.0780             nan     0.0010   -0.0000
##   8720        0.0775             nan     0.0010   -0.0000
##   8740        0.0771             nan     0.0010    0.0000
##   8760        0.0767             nan     0.0010   -0.0000
##   8780        0.0763             nan     0.0010   -0.0000
##   8800        0.0759             nan     0.0010   -0.0000
##   8820        0.0755             nan     0.0010    0.0000
##   8840        0.0750             nan     0.0010   -0.0000
##   8860        0.0747             nan     0.0010   -0.0000
##   8880        0.0743             nan     0.0010   -0.0000
##   8900        0.0739             nan     0.0010   -0.0000
##   8920        0.0735             nan     0.0010   -0.0000
##   8940        0.0732             nan     0.0010   -0.0000
##   8960        0.0728             nan     0.0010   -0.0000
##   8980        0.0724             nan     0.0010   -0.0000
##   9000        0.0720             nan     0.0010   -0.0000
##   9020        0.0717             nan     0.0010   -0.0000
##   9040        0.0713             nan     0.0010   -0.0000
##   9060        0.0710             nan     0.0010   -0.0000
##   9080        0.0706             nan     0.0010   -0.0000
##   9100        0.0702             nan     0.0010   -0.0000
##   9120        0.0698             nan     0.0010   -0.0000
##   9140        0.0695             nan     0.0010   -0.0000
##   9160        0.0692             nan     0.0010   -0.0000
##   9180        0.0688             nan     0.0010   -0.0000
##   9200        0.0684             nan     0.0010   -0.0000
##   9220        0.0680             nan     0.0010   -0.0000
##   9240        0.0676             nan     0.0010    0.0000
##   9260        0.0673             nan     0.0010   -0.0000
##   9280        0.0669             nan     0.0010   -0.0000
##   9300        0.0666             nan     0.0010   -0.0000
##   9320        0.0662             nan     0.0010   -0.0000
##   9340        0.0659             nan     0.0010   -0.0000
##   9360        0.0655             nan     0.0010   -0.0000
##   9380        0.0652             nan     0.0010   -0.0000
##   9400        0.0649             nan     0.0010   -0.0000
##   9420        0.0646             nan     0.0010   -0.0000
##   9440        0.0643             nan     0.0010   -0.0000
##   9460        0.0640             nan     0.0010   -0.0000
##   9480        0.0636             nan     0.0010   -0.0000
##   9500        0.0633             nan     0.0010   -0.0000
##   9520        0.0630             nan     0.0010   -0.0000
##   9540        0.0626             nan     0.0010   -0.0000
##   9560        0.0623             nan     0.0010   -0.0000
##   9580        0.0620             nan     0.0010   -0.0000
##   9600        0.0617             nan     0.0010   -0.0000
##   9620        0.0614             nan     0.0010   -0.0000
##   9640        0.0612             nan     0.0010   -0.0000
##   9660        0.0609             nan     0.0010   -0.0000
##   9680        0.0605             nan     0.0010   -0.0000
##   9700        0.0602             nan     0.0010   -0.0000
##   9720        0.0599             nan     0.0010   -0.0000
##   9740        0.0596             nan     0.0010   -0.0000
##   9760        0.0592             nan     0.0010   -0.0000
##   9780        0.0589             nan     0.0010   -0.0000
##   9800        0.0586             nan     0.0010   -0.0000
##   9820        0.0582             nan     0.0010   -0.0000
##   9840        0.0579             nan     0.0010   -0.0000
##   9860        0.0576             nan     0.0010   -0.0000
##   9880        0.0573             nan     0.0010   -0.0000
##   9900        0.0570             nan     0.0010   -0.0000
##   9920        0.0568             nan     0.0010   -0.0000
##   9940        0.0565             nan     0.0010   -0.0000
##   9960        0.0562             nan     0.0010   -0.0000
##   9980        0.0558             nan     0.0010    0.0000
##  10000        0.0556             nan     0.0010   -0.0000
##  10020        0.0552             nan     0.0010    0.0000
##  10040        0.0549             nan     0.0010    0.0000
##  10060        0.0547             nan     0.0010   -0.0000
##  10080        0.0544             nan     0.0010   -0.0000
##  10100        0.0541             nan     0.0010   -0.0000
##  10120        0.0538             nan     0.0010   -0.0000
##  10140        0.0535             nan     0.0010   -0.0000
##  10160        0.0532             nan     0.0010   -0.0000
##  10180        0.0529             nan     0.0010   -0.0000
##  10200        0.0526             nan     0.0010   -0.0000
##  10220        0.0524             nan     0.0010   -0.0000
##  10240        0.0521             nan     0.0010   -0.0000
##  10260        0.0518             nan     0.0010   -0.0000
##  10280        0.0515             nan     0.0010   -0.0000
##  10300        0.0513             nan     0.0010   -0.0000
##  10320        0.0510             nan     0.0010   -0.0000
##  10340        0.0508             nan     0.0010   -0.0000
##  10360        0.0505             nan     0.0010   -0.0000
##  10380        0.0503             nan     0.0010   -0.0000
##  10400        0.0500             nan     0.0010   -0.0000
##  10420        0.0498             nan     0.0010   -0.0000
##  10440        0.0496             nan     0.0010   -0.0000
##  10460        0.0493             nan     0.0010   -0.0000
##  10480        0.0490             nan     0.0010   -0.0000
##  10500        0.0488             nan     0.0010   -0.0000
##  10520        0.0486             nan     0.0010   -0.0000
##  10540        0.0483             nan     0.0010   -0.0000
##  10560        0.0480             nan     0.0010    0.0000
##  10580        0.0478             nan     0.0010   -0.0000
##  10600        0.0475             nan     0.0010   -0.0000
##  10620        0.0473             nan     0.0010   -0.0000
##  10640        0.0471             nan     0.0010   -0.0000
##  10660        0.0469             nan     0.0010   -0.0000
##  10680        0.0466             nan     0.0010   -0.0000
##  10700        0.0464             nan     0.0010   -0.0000
##  10720        0.0462             nan     0.0010   -0.0000
##  10740        0.0459             nan     0.0010   -0.0000
##  10760        0.0457             nan     0.0010   -0.0000
##  10780        0.0455             nan     0.0010   -0.0000
##  10800        0.0452             nan     0.0010   -0.0000
##  10820        0.0450             nan     0.0010   -0.0000
##  10840        0.0448             nan     0.0010   -0.0000
##  10860        0.0445             nan     0.0010   -0.0000
##  10880        0.0442             nan     0.0010   -0.0000
##  10900        0.0440             nan     0.0010   -0.0000
##  10920        0.0438             nan     0.0010   -0.0000
##  10940        0.0436             nan     0.0010   -0.0000
##  10960        0.0434             nan     0.0010   -0.0000
##  10980        0.0432             nan     0.0010   -0.0000
##  11000        0.0430             nan     0.0010    0.0000
##  11020        0.0427             nan     0.0010   -0.0000
##  11040        0.0425             nan     0.0010   -0.0000
##  11060        0.0423             nan     0.0010   -0.0000
##  11080        0.0421             nan     0.0010   -0.0000
##  11100        0.0419             nan     0.0010   -0.0000
##  11120        0.0416             nan     0.0010   -0.0000
##  11140        0.0414             nan     0.0010   -0.0000
##  11160        0.0412             nan     0.0010   -0.0000
##  11180        0.0410             nan     0.0010   -0.0000
##  11200        0.0408             nan     0.0010   -0.0000
##  11220        0.0406             nan     0.0010   -0.0000
##  11240        0.0404             nan     0.0010   -0.0000
##  11260        0.0402             nan     0.0010   -0.0000
##  11280        0.0401             nan     0.0010   -0.0000
##  11300        0.0399             nan     0.0010   -0.0000
##  11320        0.0397             nan     0.0010   -0.0000
##  11340        0.0395             nan     0.0010   -0.0000
##  11360        0.0393             nan     0.0010   -0.0000
##  11380        0.0391             nan     0.0010   -0.0000
##  11400        0.0389             nan     0.0010   -0.0000
##  11420        0.0387             nan     0.0010   -0.0000
##  11440        0.0385             nan     0.0010   -0.0000
##  11460        0.0383             nan     0.0010   -0.0000
##  11480        0.0381             nan     0.0010   -0.0000
##  11500        0.0379             nan     0.0010    0.0000
##  11520        0.0378             nan     0.0010   -0.0000
##  11540        0.0376             nan     0.0010   -0.0000
##  11560        0.0374             nan     0.0010   -0.0000
##  11580        0.0372             nan     0.0010   -0.0000
##  11600        0.0370             nan     0.0010   -0.0000
##  11620        0.0368             nan     0.0010   -0.0000
##  11640        0.0366             nan     0.0010   -0.0000
##  11660        0.0364             nan     0.0010   -0.0000
##  11680        0.0362             nan     0.0010   -0.0000
##  11700        0.0361             nan     0.0010   -0.0000
##  11720        0.0359             nan     0.0010   -0.0000
##  11740        0.0357             nan     0.0010   -0.0000
##  11760        0.0355             nan     0.0010    0.0000
##  11780        0.0353             nan     0.0010   -0.0000
##  11800        0.0351             nan     0.0010   -0.0000
##  11820        0.0349             nan     0.0010   -0.0000
##  11840        0.0348             nan     0.0010   -0.0000
##  11860        0.0346             nan     0.0010   -0.0000
##  11880        0.0344             nan     0.0010   -0.0000
##  11900        0.0342             nan     0.0010   -0.0000
##  11920        0.0341             nan     0.0010   -0.0000
##  11940        0.0339             nan     0.0010   -0.0000
##  11960        0.0337             nan     0.0010   -0.0000
##  11980        0.0335             nan     0.0010   -0.0000
##  12000        0.0334             nan     0.0010   -0.0000
##  12020        0.0332             nan     0.0010   -0.0000
##  12040        0.0330             nan     0.0010   -0.0000
##  12060        0.0329             nan     0.0010   -0.0000
##  12080        0.0327             nan     0.0010   -0.0000
##  12100        0.0325             nan     0.0010   -0.0000
##  12120        0.0324             nan     0.0010   -0.0000
##  12140        0.0322             nan     0.0010   -0.0000
##  12160        0.0321             nan     0.0010   -0.0000
##  12180        0.0319             nan     0.0010   -0.0000
##  12200        0.0317             nan     0.0010    0.0000
##  12220        0.0316             nan     0.0010   -0.0000
##  12240        0.0315             nan     0.0010   -0.0000
##  12260        0.0313             nan     0.0010   -0.0000
##  12280        0.0312             nan     0.0010   -0.0000
##  12300        0.0310             nan     0.0010   -0.0000
##  12320        0.0309             nan     0.0010   -0.0000
##  12340        0.0308             nan     0.0010   -0.0000
##  12360        0.0306             nan     0.0010   -0.0000
##  12380        0.0305             nan     0.0010   -0.0000
##  12400        0.0303             nan     0.0010   -0.0000
##  12420        0.0302             nan     0.0010   -0.0000
##  12440        0.0300             nan     0.0010   -0.0000
##  12460        0.0299             nan     0.0010   -0.0000
##  12480        0.0297             nan     0.0010   -0.0000
##  12500        0.0295             nan     0.0010   -0.0000
##  12520        0.0294             nan     0.0010    0.0000
##  12540        0.0293             nan     0.0010   -0.0000
##  12560        0.0291             nan     0.0010   -0.0000
##  12580        0.0290             nan     0.0010   -0.0000
##  12600        0.0288             nan     0.0010   -0.0000
##  12620        0.0287             nan     0.0010   -0.0000
##  12640        0.0286             nan     0.0010   -0.0000
##  12660        0.0284             nan     0.0010   -0.0000
##  12680        0.0283             nan     0.0010    0.0000
##  12700        0.0281             nan     0.0010   -0.0000
##  12720        0.0280             nan     0.0010   -0.0000
##  12740        0.0278             nan     0.0010   -0.0000
##  12760        0.0277             nan     0.0010   -0.0000
##  12780        0.0276             nan     0.0010   -0.0000
##  12800        0.0275             nan     0.0010   -0.0000
##  12820        0.0274             nan     0.0010   -0.0000
##  12840        0.0272             nan     0.0010   -0.0000
##  12860        0.0271             nan     0.0010   -0.0000
##  12880        0.0270             nan     0.0010    0.0000
##  12900        0.0269             nan     0.0010   -0.0000
##  12920        0.0267             nan     0.0010   -0.0000
##  12940        0.0266             nan     0.0010   -0.0000
##  12960        0.0265             nan     0.0010   -0.0000
##  12980        0.0264             nan     0.0010   -0.0000
##  13000        0.0262             nan     0.0010   -0.0000
##  13020        0.0261             nan     0.0010   -0.0000
##  13040        0.0260             nan     0.0010   -0.0000
##  13060        0.0259             nan     0.0010   -0.0000
##  13080        0.0258             nan     0.0010   -0.0000
##  13100        0.0256             nan     0.0010   -0.0000
##  13120        0.0255             nan     0.0010   -0.0000
##  13140        0.0254             nan     0.0010   -0.0000
##  13160        0.0253             nan     0.0010   -0.0000
##  13180        0.0251             nan     0.0010   -0.0000
##  13200        0.0250             nan     0.0010   -0.0000
##  13220        0.0249             nan     0.0010   -0.0000
##  13240        0.0247             nan     0.0010   -0.0000
##  13260        0.0246             nan     0.0010   -0.0000
##  13280        0.0245             nan     0.0010   -0.0000
##  13300        0.0244             nan     0.0010   -0.0000
##  13320        0.0243             nan     0.0010   -0.0000
##  13340        0.0242             nan     0.0010   -0.0000
##  13360        0.0240             nan     0.0010   -0.0000
##  13380        0.0239             nan     0.0010   -0.0000
##  13400        0.0238             nan     0.0010   -0.0000
##  13420        0.0237             nan     0.0010   -0.0000
##  13440        0.0235             nan     0.0010   -0.0000
##  13460        0.0234             nan     0.0010   -0.0000
##  13480        0.0233             nan     0.0010   -0.0000
##  13500        0.0232             nan     0.0010   -0.0000
##  13520        0.0231             nan     0.0010   -0.0000
##  13540        0.0230             nan     0.0010   -0.0000
##  13560        0.0228             nan     0.0010   -0.0000
##  13580        0.0227             nan     0.0010   -0.0000
##  13600        0.0226             nan     0.0010   -0.0000
##  13620        0.0225             nan     0.0010   -0.0000
##  13640        0.0224             nan     0.0010   -0.0000
##  13660        0.0223             nan     0.0010   -0.0000
##  13680        0.0222             nan     0.0010   -0.0000
##  13700        0.0221             nan     0.0010   -0.0000
##  13720        0.0220             nan     0.0010    0.0000
##  13740        0.0219             nan     0.0010   -0.0000
##  13760        0.0218             nan     0.0010   -0.0000
##  13780        0.0217             nan     0.0010   -0.0000
##  13800        0.0215             nan     0.0010   -0.0000
##  13820        0.0214             nan     0.0010   -0.0000
##  13840        0.0213             nan     0.0010   -0.0000
##  13860        0.0212             nan     0.0010   -0.0000
##  13880        0.0211             nan     0.0010    0.0000
##  13900        0.0210             nan     0.0010   -0.0000
##  13920        0.0209             nan     0.0010   -0.0000
##  13940        0.0208             nan     0.0010   -0.0000
##  13960        0.0207             nan     0.0010   -0.0000
##  13980        0.0206             nan     0.0010   -0.0000
##  14000        0.0205             nan     0.0010   -0.0000
##  14020        0.0204             nan     0.0010   -0.0000
##  14040        0.0203             nan     0.0010   -0.0000
##  14060        0.0202             nan     0.0010   -0.0000
##  14080        0.0201             nan     0.0010   -0.0000
##  14100        0.0200             nan     0.0010    0.0000
##  14120        0.0199             nan     0.0010   -0.0000
##  14140        0.0198             nan     0.0010   -0.0000
##  14160        0.0197             nan     0.0010   -0.0000
##  14180        0.0196             nan     0.0010   -0.0000
##  14200        0.0195             nan     0.0010   -0.0000
##  14220        0.0194             nan     0.0010   -0.0000
##  14240        0.0193             nan     0.0010   -0.0000
##  14260        0.0192             nan     0.0010   -0.0000
##  14280        0.0191             nan     0.0010   -0.0000
##  14300        0.0191             nan     0.0010   -0.0000
##  14320        0.0190             nan     0.0010   -0.0000
##  14340        0.0189             nan     0.0010   -0.0000
##  14360        0.0188             nan     0.0010   -0.0000
##  14380        0.0187             nan     0.0010   -0.0000
##  14400        0.0186             nan     0.0010   -0.0000
##  14420        0.0185             nan     0.0010   -0.0000
##  14440        0.0184             nan     0.0010   -0.0000
##  14460        0.0183             nan     0.0010   -0.0000
##  14480        0.0183             nan     0.0010   -0.0000
##  14500        0.0182             nan     0.0010   -0.0000
##  14520        0.0181             nan     0.0010   -0.0000
##  14540        0.0180             nan     0.0010   -0.0000
##  14560        0.0179             nan     0.0010   -0.0000
##  14580        0.0178             nan     0.0010   -0.0000
##  14600        0.0177             nan     0.0010   -0.0000
##  14620        0.0176             nan     0.0010   -0.0000
##  14640        0.0176             nan     0.0010   -0.0000
##  14660        0.0175             nan     0.0010   -0.0000
##  14680        0.0174             nan     0.0010    0.0000
##  14700        0.0173             nan     0.0010   -0.0000
##  14720        0.0172             nan     0.0010   -0.0000
##  14740        0.0171             nan     0.0010   -0.0000
##  14760        0.0171             nan     0.0010   -0.0000
##  14780        0.0170             nan     0.0010   -0.0000
##  14800        0.0169             nan     0.0010   -0.0000
##  14820        0.0168             nan     0.0010   -0.0000
##  14840        0.0167             nan     0.0010   -0.0000
##  14860        0.0167             nan     0.0010   -0.0000
##  14880        0.0166             nan     0.0010   -0.0000
##  14900        0.0165             nan     0.0010   -0.0000
##  14920        0.0164             nan     0.0010   -0.0000
##  14940        0.0163             nan     0.0010   -0.0000
##  14960        0.0162             nan     0.0010   -0.0000
##  14980        0.0162             nan     0.0010   -0.0000
##  15000        0.0161             nan     0.0010   -0.0000
save(gbmtest, file = "gbmtest.RCa")
#check performance using 5-fold cross-valiCion
best.iter <- gbm.perf(gbmtest,method="cv",plot.it=FALSE) #this gives you the optimal number of trees based on cv performance, other methods will over or under predict
print(best.iter)
## [1] 2897
gbm_error = data.frame(train.error = gbmtest$train.error,
                       trees = seq(1,n.trees))
plot <- ggplot(gbm_error, aes(x = trees, y = train.error))+
  geom_line()
plot

ggsave(filename = "deviance_enviro_vector.jpg",
       plot = plot)
## Saving 7 x 5 in image
#Stop the clock
(proc.time()-ptm)/60
##        user      system     elapsed 
## 0.063916667 0.005033333 0.202616667
load("gbmtest.RCa")
best.iter <- gbm.perf(gbmtest,method="cv",plot.it=FALSE) #this gives you the optimal number of trees based on cv performance, other methods 
# output predictions on the TRAINING SET
output<-predict(gbmtest, 
                newdata=Train, 
                n.trees=best.iter, 
                type="response") 

output<-cbind(output,Train$Species_no)
colnames(output)<-c("output","data")
rownames(output)<-rownames(Train)
output<-output[order(-output[,1]),]

# # AUC for Bernoulli distributed responses
par(mar = c(1,1,1,1))
auc=colAUC(output[,1],output[,2],
           plotROC = TRUE)

print(auc)
##         [,1]
## 0 vs. 1    1
pred<-prediction(output[,1],output[,2])
perf<-performance(pred,"tpr","fpr")

par(mar = c(1,1,1,1))
plot(perf,colorize=TRUE,main="ROC full model")
abline(a=0, b= 1)

# output predictions on the Test SET
output<-predict(gbmtest,
                newdata=Test,
                n.trees=best.iter,
                type="response")
Test$pred = round(output)

output<-cbind(output,Test$Species_no)
colnames(output)<-c("output","data")
rownames(output)<-rownames(Test)
output<-output[order(-output[,1]),]

# # AUC for Bernoulli distributed responses
par(mar = c(1,1,1,1))
auc=colAUC(output[,1],output[,2],
           plotROC = TRUE)

print(auc)
##              [,1]
## 0 vs. 1 0.6041667
pred<-prediction(output[,1],output[,2])
perf<-performance(pred,"tpr","fpr")

par(mar = c(1,1,1,1))
plot(perf,colorize=TRUE,main="ROC full model test data")

confusion matrix– rodents – GBM

# confusion matrix -- Test set
confusionMatrix(data = factor(Test$pred),
                factor(Test$Species_no),
                mode = "everything")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction 0 1
##          0 4 4
##          1 2 4
##                                           
##                Accuracy : 0.5714          
##                  95% CI : (0.2886, 0.8234)
##     No Information Rate : 0.5714          
##     P-Value [Acc > NIR] : 0.6105          
##                                           
##                   Kappa : 0.16            
##                                           
##  Mcnemar's Test P-Value : 0.6831          
##                                           
##             Sensitivity : 0.6667          
##             Specificity : 0.5000          
##          Pos Pred Value : 0.5000          
##          Neg Pred Value : 0.6667          
##               Precision : 0.5000          
##                  Recall : 0.6667          
##                      F1 : 0.5714          
##              Prevalence : 0.4286          
##          Detection Rate : 0.2857          
##    Detection Prevalence : 0.5714          
##       Balanced Accuracy : 0.5833          
##                                           
##        'Positive' Class : 0               
## 
# inds_observed_1_predicted_0 = which(Test$Species_no == 1 & Test$pred ==0)
# 
# Test$Matches[inds_observed_1_predicted_0]
# 
# inds_observed_0_predicted_1 = which(Test$Species_no == 0 & Test$pred ==1)
# Test$Matches[inds_observed_0_predicted_1]

# output predictions on the Train SET
output<-predict(gbmtest,
                newdata=Train,
                n.trees=best.iter,
                type="response")
Train$pred = round(output)
# confusion matrix
confusionMatrix(data = factor(Train$pred),
                factor(Train$Species_no),
                mode = "everything")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 30  0
##          1  0 26
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9362, 1)
##     No Information Rate : 0.5357     
##     P-Value [Acc > NIR] : 6.611e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##               Precision : 1.0000     
##                  Recall : 1.0000     
##                      F1 : 1.0000     
##              Prevalence : 0.5357     
##          Detection Rate : 0.5357     
##    Detection Prevalence : 0.5357     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : 0          
## 

###permute labels and find AUC – bootstrap

load("C.Rdata")

#Start the clock
ptm<-proc.time()

permutedAUC<-c()

word = "binomial"
best.iter.list = c()

i=1
while (i <= 50) {
  # for permutation loop
  
  ## random permutation of Label
  randomLabel<-sample(C$Species_no)

  pan2<-cbind(randomLabel,C)
  #remove previous label
  rm = "Species_no"
  keep = setdiff(names(pan2),rm)
  pan2 = pan2[,keep]
  
  pan2[,1]<-sapply(pan2[,1],as.character)
  
  ## create training and test sets
  intrain2<-createDataPartition(y=pan2$randomLabel,
                                p=0.8,
                                list=FALSE)
  
  test2<-pan2[-intrain2,]
  training2<-pan2[intrain2,]
  
  check<-1-is.na(training2)*1
  checksum<-apply(check,2,sum)
  if(length(which(checksum>=2))==dim(training2)[2]){#this makes sure we don't get any columns with all zeros. Should be == to the number of columns
 
  
    ## random permutation of Labels ~ traits
    label_col = which(names(training2)== "randomLabel")
  
    names <-  colnames(training2[,-c(label_col)])
    y_col = label_col

    model<-as.formula(paste(colnames(training2)[y_col], "~",
                        paste(names,collapse = "+"),
                        sep = ""))
    
    
    gbm2<- gbm(model,
                   data=training2, 
                   distribution="bernoulli",
                   n.trees=15000,
                   shrinkage=0.001,
                   interaction.depth=3,
                   bag.fraction=0.50,
                   train.fraction=1,
                   n.minobsinnode=3,
                   cv.folds=10,
                   keep.data=TRUE)
    # verbose=TRUE)
    
    #check performance using 5-fold cross-valiCion
    best.iter2 <- gbm.perf(gbm2,method="cv",plot.it=FALSE) #OOB method under predicts
    best.iter.list=c(best.iter.list, best.iter2)

        #   batsum2<-summary.gbm(gbm2,n.trees=best.iter,method=relative.influence,plotit=FALSE)
    
    ## LABEL
    ## predictions on the TRAINING SET
    output2<-predict(gbm2, newdata=training2, n.trees=best.iter2, type="response") 
    output2<-cbind(output2,as.numeric(training2$randomLabel))
    #   colnames(output2)<-c("output","label")
    #   output2<-output2[order(-as.numeric(output2[,1])),]
    
    # # training AUC for Bernoulli distributed responses
    auc2=colAUC(output2[,1],output2[,2])
    
    # Predictions on the TEST set
    output.test2<-predict(gbm2, newdata=test2, n.trees=best.iter2, type="response") 
    output.test2<-cbind(output.test2,as.numeric(test2$randomLabel))
    # colnames(output.test2)<-c("output","label")
    # output.test2<-output.test2[order(-output.test2[,1]),]
    # plot(output.test)
    
    ## test AUC for Bernoulli distributed responses
    auctest2=colAUC(output.test2[,1],output.test2[,2])
    
    permutedAUC[i]<-auctest2
    print(auctest2)
    i=i+1
    print(i)#check where we are in bootstrap
  } else i=i
}
##             [,1]
## 0 vs. 1 0.547619
## [1] 2
##              [,1]
## 0 vs. 1 0.5714286
## [1] 3
##              [,1]
## 0 vs. 1 0.6666667
## [1] 4
##              [,1]
## 0 vs. 1 0.9047619
## [1] 5
##              [,1]
## 0 vs. 1 0.6666667
## [1] 6
##              [,1]
## 0 vs. 1 0.6071429
## [1] 7
##              [,1]
## 0 vs. 1 0.7142857
## [1] 8
##              [,1]
## 0 vs. 1 0.7619048
## [1] 9
##         [,1]
## 0 vs. 1  0.5
## [1] 10
##              [,1]
## 0 vs. 1 0.5714286
## [1] 11
##              [,1]
## 0 vs. 1 0.5952381
## [1] 12
##             [,1]
## 0 vs. 1 0.547619
## [1] 13
##              [,1]
## 0 vs. 1 0.7857143
## [1] 14
##              [,1]
## 0 vs. 1 0.6428571
## [1] 15
##              [,1]
## 0 vs. 1 0.6190476
## [1] 16
##             [,1]
## 0 vs. 1 0.547619
## [1] 17
##              [,1]
## 0 vs. 1 0.5952381
## [1] 18
##              [,1]
## 0 vs. 1 0.5238095
## [1] 19
##              [,1]
## 0 vs. 1 0.5952381
## [1] 20
##              [,1]
## 0 vs. 1 0.5357143
## [1] 21
##             [,1]
## 0 vs. 1 0.547619
## [1] 22
##              [,1]
## 0 vs. 1 0.5714286
## [1] 23
##              [,1]
## 0 vs. 1 0.6190476
## [1] 24
##              [,1]
## 0 vs. 1 0.7261905
## [1] 25
##         [,1]
## 0 vs. 1  0.5
## [1] 26
##              [,1]
## 0 vs. 1 0.5238095
## [1] 27
##              [,1]
## 0 vs. 1 0.6666667
## [1] 28
##         [,1]
## 0 vs. 1  0.5
## [1] 29
##              [,1]
## 0 vs. 1 0.5952381
## [1] 30
##             [,1]
## 0 vs. 1 0.547619
## [1] 31
##              [,1]
## 0 vs. 1 0.6666667
## [1] 32
##              [,1]
## 0 vs. 1 0.5714286
## [1] 33
##         [,1]
## 0 vs. 1  0.5
## [1] 34
##              [,1]
## 0 vs. 1 0.6190476
## [1] 35
##              [,1]
## 0 vs. 1 0.8095238
## [1] 36
##             [,1]
## 0 vs. 1 0.547619
## [1] 37
##         [,1]
## 0 vs. 1  0.5
## [1] 38
##              [,1]
## 0 vs. 1 0.6666667
## [1] 39
##             [,1]
## 0 vs. 1 0.702381
## [1] 40
##             [,1]
## 0 vs. 1 0.547619
## [1] 41
##              [,1]
## 0 vs. 1 0.7380952
## [1] 42
##              [,1]
## 0 vs. 1 0.5714286
## [1] 43
##              [,1]
## 0 vs. 1 0.5595238
## [1] 44
##              [,1]
## 0 vs. 1 0.6190476
## [1] 45
##         [,1]
## 0 vs. 1  0.5
## [1] 46
##              [,1]
## 0 vs. 1 0.5238095
## [1] 47
##              [,1]
## 0 vs. 1 0.5238095
## [1] 48
##              [,1]
## 0 vs. 1 0.6190476
## [1] 49
##              [,1]
## 0 vs. 1 0.7261905
## [1] 50
##              [,1]
## 0 vs. 1 0.6190476
## [1] 51
sum(is.na(permutedAUC)*1) #how many NAs
## [1] 0
permutedAUC2<-na.omit(permutedAUC)
mean(permutedAUC2)
## [1] 0.6085714
sd(permutedAUC2)
## [1] 0.09112981
#Stop the clock
(proc.time()-ptm)/60
##      user    system   elapsed 
## 2.0804500 0.1201333 9.0096667
write.csv(best.iter.list, file = paste0("best.iter.list.","AUC.", word, ".csv"))

###plot relative influence

load("gbmtest.Rdata")
x = summary(gbmtest)

# 
x.df= data.frame(variable = x$var,
                 relative.influence = x$rel.inf)

x.df.0 = subset(x.df, relative.influence==0)
dim(x.df.0)[1]
## [1] 6
x.df = subset(x.df, relative.influence>=1)#take only interesting variables

x.df$variable = factor(x.df$variable, levels = x.df$variable[order(x.df$relative.influence)])
save(x.df, file = "x.df.Rdata")
ggplot(data = x.df, aes(x = variable, y =relative.influence))+
  ylab("relative influence (%)")+
  xlab("variable")+
  geom_bar(stat="identity")+
  coord_flip()

# 
ggsave("Figure.relative.influence.jpg")
## Saving 7 x 5 in image

##Bootstrap permutations for distribution of relative influence –

load("C.Rdata")
df  = C
#Start the clock
ptm<-proc.time()

permutedAUC<-c()
permutedAUC_train<-c()

out = NULL
list_save <- list()
intrain_list = NULL
best.iter2.list = c()
bootstrap_runs=50
i = 1
while (i <= bootstrap_runs) {
  # for permutation loop
  ## create training and test sets
  pan2 = df
  intrain2<-createDataPartition(y=pan2$Species_no,
                                p=0.8,
                                list=FALSE)
  test2<-pan2[-intrain2,]
  training2<-pan2[intrain2,]
  intrain_list[[i]]=intrain2
  
  check<-1-is.na(training2)*1
  checksum<-apply(check,2,sum)
  n_cols = dim(training2)[2]
  
  if(length(which(checksum>=2))==n_cols){#this makes sure we don't get any columns with all zeros. Should be == to the number of columns
    
    label_col = which(names(training2)== "Species_no")

    names <-  colnames(training2[,-c(label_col)])
    y_col = label_col

    model<-as.formula(paste(colnames(training2)[y_col], "~",
                        paste(names,collapse = "+"),
                        sep = ""))
    gbm2<- gbm(model,
               data=training2, 
               distribution="bernoulli",
               n.trees=15000,
               shrinkage=0.01,
               interaction.depth=3,
               bag.fraction=0.50,
               train.fraction=1,
               n.minobsinnode=3,
               cv.folds=10,
               keep.data=TRUE)
    
    list_save <- c(list_save, list(gbm2))
    
    #get the relative influence info
    x = summary(gbm2)
    x.df= data.frame(variable = x$var,
                     relative.influence = x$rel.inf)
    x.df$variable = factor(x.df$variable, levels = x.df$variable[order(x.df$relative.influence)])
    #save these results
    x.df$i = i
    out =  rbind(out, x.df)
    
    
    #check performance using 5-fold cross-validation
    best.iter2 <- gbm.perf(gbm2,method="cv",plot.it=FALSE) #OOB method under predicts
    best.iter2.list = c(best.iter2.list, best.iter2)
    
    ## LABEL
    ## predictions on the TRAINING SET
    output2<-predict(gbm2, newdata=training2, n.trees=best.iter2, type="response") 
    output2<-cbind(output2,as.numeric(training2$Species_no))
    
    # # training AUC for Bernoulli distributed responses
    auc2=colAUC(output2[,1],output2[,2])
    permutedAUC_train[i]<-auc2
    
    # Predictions on the TEST set
    output.test2<-predict(gbm2, newdata=test2, n.trees=best.iter2, type="response") 
    output.test2<-cbind(output.test2,as.numeric(test2$Species_no))
    ## test AUC for Bernoulli distributed responses
    auctest2=colAUC(output.test2[,1],output.test2[,2])
    
    permutedAUC[i]<-auctest2
    print(auctest2)
    i=i+1
    print(i)#check where we are in bootstrap
  } else i=i
}

##              [,1]
## 0 vs. 1 0.7959184
## [1] 2

##              [,1]
## 0 vs. 1 0.8666667
## [1] 3

##              [,1]
## 0 vs. 1 0.7708333
## [1] 4

##              [,1]
## 0 vs. 1 0.5777778
## [1] 5

##              [,1]
## 0 vs. 1 0.7083333
## [1] 6

##              [,1]
## 0 vs. 1 0.9583333
## [1] 7

##          [,1]
## 0 vs. 1 0.875
## [1] 8

##              [,1]
## 0 vs. 1 0.8333333
## [1] 9

##              [,1]
## 0 vs. 1 0.6734694
## [1] 10

##          [,1]
## 0 vs. 1 0.675
## [1] 11

##              [,1]
## 0 vs. 1 0.5208333
## [1] 12

##              [,1]
## 0 vs. 1 0.8571429
## [1] 13

##              [,1]
## 0 vs. 1 0.7111111
## [1] 14

##              [,1]
## 0 vs. 1 0.6888889
## [1] 15

##         [,1]
## 0 vs. 1 0.75
## [1] 16

##              [,1]
## 0 vs. 1 0.8444444
## [1] 17

##              [,1]
## 0 vs. 1 0.8444444
## [1] 18

##         [,1]
## 0 vs. 1  0.8
## [1] 19

##              [,1]
## 0 vs. 1 0.5714286
## [1] 20

##          [,1]
## 0 vs. 1 0.625
## [1] 21

##              [,1]
## 0 vs. 1 0.6326531
## [1] 22

##              [,1]
## 0 vs. 1 0.6530612
## [1] 23

##              [,1]
## 0 vs. 1 0.8333333
## [1] 24

##              [,1]
## 0 vs. 1 0.6041667
## [1] 25

##         [,1]
## 0 vs. 1 0.75
## [1] 26

##              [,1]
## 0 vs. 1 0.7708333
## [1] 27

##              [,1]
## 0 vs. 1 0.6666667
## [1] 28

##           [,1]
## 0 vs. 1 0.6875
## [1] 29

##              [,1]
## 0 vs. 1 0.7346939
## [1] 30

##           [,1]
## 0 vs. 1 0.6875
## [1] 31

##         [,1]
## 0 vs. 1 0.75
## [1] 32

##              [,1]
## 0 vs. 1 0.7708333
## [1] 33

##         [,1]
## 0 vs. 1  0.8
## [1] 34

##              [,1]
## 0 vs. 1 0.7142857
## [1] 35

##              [,1]
## 0 vs. 1 0.7755102
## [1] 36

##              [,1]
## 0 vs. 1 0.6938776
## [1] 37

##              [,1]
## 0 vs. 1 0.7111111
## [1] 38

##         [,1]
## 0 vs. 1 0.75
## [1] 39

##         [,1]
## 0 vs. 1    1
## [1] 40

##           [,1]
## 0 vs. 1 0.6875
## [1] 41

##              [,1]
## 0 vs. 1 0.6041667
## [1] 42

##         [,1]
## 0 vs. 1 0.95
## [1] 43

##              [,1]
## 0 vs. 1 0.8444444
## [1] 44

##             [,1]
## 0 vs. 1 0.755102
## [1] 45

##           [,1]
## 0 vs. 1 0.8125
## [1] 46

##              [,1]
## 0 vs. 1 0.8541667
## [1] 47

##              [,1]
## 0 vs. 1 0.8571429
## [1] 48

##              [,1]
## 0 vs. 1 0.5918367
## [1] 49

##           [,1]
## 0 vs. 1 0.9375
## [1] 50

##         [,1]
## 0 vs. 1 0.65
## [1] 51
save(intrain_list, file = "intrain_list_presence.Rdata")

sum(is.na(permutedAUC)*1) #how many NAs
## [1] 0
permutedAUC2<-na.omit(permutedAUC)
mean(permutedAUC2)
## [1] 0.7495669
sd(permutedAUC2)
## [1] 0.1079915
sum(is.na(permutedAUC_train)*1) #how many NAs
## [1] 0
permutedAUC2_train<-na.omit(permutedAUC_train)
mean(permutedAUC2_train )
## [1] 0.9992828
sd(permutedAUC2_train)
## [1] 0.001489297
save(best.iter2.list, file = "best.iter2.list.binomial.rel.inf.Rdata")

#Stop the clock
(proc.time()-ptm)/60
##     user   system  elapsed 
## 2.543850 0.118100 8.913083
#summarize the relative influence data
out_sum <- out %>% 
  group_by(variable) %>%
  summarize(mean_influence = mean(relative.influence)) %>%
  filter(mean_influence>1)

#get just the data for variables with mean influence greater than 1%
out_high = subset(out, variable %in% out_sum$variable)

save(out_high, file = "out_high.Rdata")
load("out_high.Rdata")
plot<- ggplot(data = out_high, aes(x = variable, y =relative.influence))+
  ylab("relative influence (%)")+
  xlab("variable")+
  geom_boxplot()+
  coord_flip()

ggsave(plot = plot, filename = "Figure.relative.influence.boxplot.jpg")
## Saving 7 x 5 in image
save(out, file = "rel.inf.presence.Rdata")
#save list_save
save(list_save, file = "list_save_presence.Rdata")

load("list_save_presence.Rdata")